@Article{Yuan2026, author="Yuan, Zhengwu and Liu, Yang and Yang, Aixia and Wang, Dacheng", title="A high-resolution daily CO$_2$ dataset for China (2016--2020)", journal="Scientific Data", year="2026", month="Jan", day="15", volume="13", number="1", pages="249", abstract="High-resolution column-averaged dry-air CO$_2$ mole fraction (XCO$_2$) data are essential for characterizing carbon sources and sinks, advancing carbon cycle research, and supporting climate policy goals such as carbon peaking and carbon neutrality. However, current satellite retrievals are often spatially fragmented and temporally discontinuous due to cloud cover and aerosol interference. To address these limitations, this study utilizes an XGBoost model optimized via Bayesian optimization (XGBoost-BO) to construct a robust mapping relationship between atmospheric XCO$_2$ concentrations and multi-source auxiliary parameters. Crucially, the incorporation of the SHAP (SHapley Additive exPlanations) methodology enhances model interpretability, ensuring that the reconstruction captures physically meaningful spatiotemporal dynamics across China. The reconstructed XCO$_2$ dataset exhibits high consistency with OCO-2 satellite observations, achieving a coefficient of determination (R{\texttwosuperior}) of 0.98, a Root Mean Square Error (RMSE) of 0.58 ppm, and a Mean Absolute Percentage Error (MAPE) of 0.07{\%}. The model's reliability is further validated against ground-based TCCON measurements in China, achieving an R{\texttwosuperior} of 0.92 (RMSE{\thinspace}={\thinspace}1.16 ppm, MAPE{\thinspace}={\thinspace}0.2{\%}) at the Hefei site and an R{\texttwosuperior} of 0.70 (RMSE{\thinspace}={\thinspace}2.00 ppm, MAPE{\thinspace}={\thinspace}0.4{\%}) at the Xianghe site.", issn="2052-4463", doi="10.1038/s41597-026-06569-w", url="https://doi.org/10.1038/s41597-026-06569-w" }